//////////////////////////////////////////////////////////////////////////
** Set Up
//////////////////////////////////////////////////////////////////////////

global path "your path"

global data "$path/data"
global fig 	"$path/figures"
global tab 	"$path/tables"

clear all 
drop _all 
grstyle init 
grstyle set plain, nogrid
graph set window fontface "Times New Roman"

global fe 	i.strataid i.year  
global ctr 	i.strataid i.year female repeater i.age i.major_category
global hs 	hs_German hs_math_grade hs_language_grade hs_hours  
global pt 	compete open cons extr agre neur 
global ppt 	PMcompete PMopen PMcons PMextr PMagre PMneur
global opt 	PMfemale PMhs_math_grade PMhs_German PMhs_language_grade 

	
//////////////////////////////////////////////////////////////////////////////////////
** Figure 1: The Impact of Peer Personality at Baseline on Own Personality at Endline
//////////////////////////////////////////////////////////////////////////////////////	

	use "$data/analysis_data", clear
	eststo clear 
	
	foreach x in $pt {
	local xlab "`: var lab `x' '"
	lab var  PM`x' "Impact of Peer `xlab' on Own `xlab'"
	eststo F`x': qui reg `x'_edl PM`x' `x' $fe, vce(cl groupid)
	}
	coefplot F*, keep($ppt) level(95) grid(none) xline(0, lp(shortdash) lc(gray) lw(.2)) msize(small) legend(off)  xlab(-.08(.04).12, labsize(vsmall) format(%3.2f)) ylab(, labs(small))  ciopts(lc(black)) mc(black) mlab mlabpos(12) mlabgap(1) format(%4.3f) graphregion(margin(tiny)) mlabc(black)
		
	graph export "$fig/Fig1_MainResults.pdf", replace 
	
	
//////////////////////////////////////////////////////////////////////////
** Figure 2: Balance, Short- and Longer-Term Effects of Peer Personality
//////////////////////////////////////////////////////////////////////////
	
use "$data/baseline_groups", clear 	

	foreach x in compete cons open agre neur extr{
	qui bys year strataid: egen `x'_sm=mean(`x')
	qui bys year strataid: gen `x'_n=_N 
	qui gen `x'_LOM=(`x'_sm*`x'_n-`x')/(`x'_n-1)
	}
	
	foreach x in $pt {
	qui sum `x' if !mi(compete_edl)
	qui replace `x'=(`x'-`r(mean)')/`r(sd)' 
	qui sum PM`x' if !mi(compete_edl)
	qui replace PM`x'=(PM`x'-`r(mean)')/`r(sd)'
	
	** baseline estimates 
	qui eststo `x'_bsl: qui reg `x' PM`x' `x'_LOM i.strataid#i.year if !mi(compete_edl), vce(cl groupid) 
	
	** endline estimates 
	qui sum `x'_edl if !mi(compete_edl)
	qui replace `x'_edl=(`x'_edl-`r(mean)')/`r(sd)'
	qui eststo `x'_edl: qui reg `x'_edl PM`x' $hs $pt $ctr if !mi(compete_edl), vce(cl groupid)
	}
	
	egen id_temp=group(id year)
	reshape long agre_fl neur_fl extr_fl open_fl cons_fl compete_fl , i(id_temp) j(flyr)
	replace flyr=2000+flyr
	
	gen followup_response =(!mi(compete_fl))	if year<flyr 
	save "$data/follow_up_response", replace 
	
	
	** long-term estimates
	use "$data/follow_up_response", clear 
	keep if !mi(compete_edl) & !mi(compete_fl)
	
	foreach x in compete open cons {	
	sum `x'_fl 
	replace `x'_fl =(`x'_fl -`r(mean)')/`r(sd)'
	eststo `x'_fu: reghdfe `x'_fl PM`x' $pt $hs, a($ctr flyr) vce(cl groupid) 
 	}	
	
 	local x compete
	coefplot `x'_bsl, bylabel("Baseline") ///
	 || `x'_edl,  bylabel(Endline) ///
	 || `x'_fu,  bylabel(Follow-Up) ///
	 || , bycoefs keep(PM`x') vertical mc(black) level(95 ) ciopts(recast(. rcap) lc(black)) grid(off) msize(small) ylab(, labs(vsmall) format(%3.2f) angle(0) nogrid) mlab mlabpos(3) format(%4.3f) mlabsize(small)  yline(0, lc(gray) lp(shortdash)) legend(off) xlab(, labsize(small) nogrid) xsca(off) ytitle("Peer" "Competitiveness", size(small) margin(r=2)) name(`x', replace) graphregion(margin(medsmall)) fysize(40) fxsize(100) mlabc(black)
	 
 	local x open
	coefplot `x'_bsl, bylabel("Baseline") ///
	 || `x'_edl,  bylabel(Endline) ///
	 || `x'_fu,  bylabel(Follow-Up) ///
	 || , bycoefs keep(PM`x') vertical mc(black) level(95 ) ciopts(recast(. rcap) lc(black)) grid(off) msize(small) ylab(, labs(vsmall) format(%3.2f) angle(0) nogrid) mlab mlabpos(2) format(%4.3f) mlabsize(small)  yline(0, lc(gray) lp(shortdash)) legend(off) xlab(, labsize(small) nogrid) xsca(off) ytitle("Peer" "Openness", size(small)  margin(r=2)) name(`x', replace) graphregion(margin(medsmall)) fysize(40) fxsize(100) mlabc(black)
	  
	local x cons
	coefplot `x'_bsl, bylabel("Baseline") ///
	 || `x'_edl,  bylabel(Endline) ///
	 || `x'_fu,  bylabel(Follow-Up) ///
	 || , bycoefs keep(PM`x') vertical mc(black) level(95) ciopts(recast(. rcap) lc(black)) grid(off) msize(small) ylab(, labs(vsmall) format(%3.2f) angle(0) nogrid) mlab mlabpos(3) format(%4.3f) mlabsize(small)  yline(0, lc(gray) lp(shortdash)) legend(off) xlab(, labsize(small) nogrid) ytitle("Peer" "consientiousness", size(small) margin(r=2)) name(`x', replace) graphregion(margin(medsmall)) fysize(46) fxsize(100) mlabc(black)
	 
	graph combine compete open cons, xcomm col(1) graphregion(margin(zero))  
	graph display, xsize(12) ysize(12)
	graph export "$fig/Fig2_PersistentEffects.pdf", replace		
	
	
/////////////////////////////////////////////////////////////////////////////////
** Figure 3: Impact of Peer Personality and Achievement on Educational Outcomes
/////////////////////////////////////////////////////////////////////////////////
	
use "$data/analysis_data", clear

	lab var hs_math_grade "Math Achievement"
	egen Score=std(exam_score)
	egen mathG=std(hs_math_grade)
	sum PMhs_math_grade 
	replace PMhs_math_grade=(PMhs_math_grade-`r(mean)')/`r(sd)'

	foreach x in compete open cons hs_math_grade  {
	local xlab "`: var lab `x' '"	
	lab var PM`x' "Impact of Peer `xlab'"
	eststo E`x': qui reg exam_attend PM`x'  `x' $ctr, vce(cl groupid)
	eststo S`x': qui reg Score PM`x' 		`x' $ctr, vce(cl groupid)
	eststo A`x': qui reg anxiety PM`x' 		`x' $ctr, vce(cl groupid)
	}
	
	coefplot (Ecompete, keep(PMcompete)) (Eopen, keep(PMopen)) (Econs, keep(PMcons)) (Ehs_math_grade, keep(PMhs_math_grade) m(d) ciopts(lc(black)) mc(black) mlabc(black)) , ciopts(lc(black)) mc(black) level(95)  grid(none)  xline(0, lw(.2) lc(gray) lp(shortdash)) msize(small) legend(off)  xlab(#4, labs(vsmall) format(%3.2f)) ylab(, labs(medsmall)) mlab mlabpos(12) format(%4.3f) mlabsize(med) mlabc(black) title("A." "Exam Attendance", size(med) margin(b=5)) name(attend, replace)   fysize(120) fxsize(120)
	
	coefplot (Scompete, keep(PMcompete)) (Sopen, keep(PMopen)) (Scons, keep(PMcons)) (Shs_math_grade,keep(PMhs_math_grade) m(d) ciopts(lc(black)) mc(black) mlabc(black)) , level(95)  ciopts(lc(black)) mc(black) grid(none)  xline(0, lw(.2) lc(gray) lp(shortdash)) msize(small) legend(off) mlabc(black) xlab(#4, labs(vsmall) format(%3.2f)) mlab mlabpos(12) format(%4.3f) mlabsize(med)  title("B." "Std. Exam Score", size(med) margin(b=5)) name(score, replace)   fysize(120) fxsize(50) ysca(off)
	
	coefplot (Acompete, keep(PMcompete)) (Aopen, keep(PMopen)) (Acons, keep(PMcons)) (Ahs_math_grade, keep(PMhs_math_grade) m(d) ciopts(lc(black)) mc(black) mlabc(black)) , level(95) ciopts(lc(black)) mc(black) grid(none)  xline(0, lw(.2) lc(gray) lp(shortdash)) msize(small) mlabc(black) legend(off)  xlab(#4, labs(vsmall) format(%3.2f)) mlab mlabpos(12) format(%4.3f) mlabsize(med)  title("C." "Anxiety", size(med) margin(b=5)) name(phq, replace)   fysize(120) fxsize(50) ysca(off)
	
	graph combine attend score phq, ycomm col(3)  graphregion(margin(zero))
	graph export "$fig/Fig3_HardOutcomes.pdf",  replace 		
	

////////////////////////////////////
** Table 1: Descriptive Statistics
////////////////////////////////////
	 
use "$data/baseline_groups", clear
	keep if !mi(compete_edl)
	replace age = age+17 
	foreach x in $pt {
	lab var `x' "Baseline `: var lab `x''"	
	}
	
	* Panel A
	tab year 
	tab study_program
	tab major_category
	
	* Panel B 
	cd "$tab"
	keep female age repeater hs_math_grade hs_language_grade hs_German hs_hours ///
	compete PMcompete compete_edl ///
	open PMopen open_edl ///
	cons PMcons cons_edl ///
	extr PMextr extr_edl ///
	agre PMagre agre_edl ///
	neur PMneur neur_edl ///
	exam_attendance exam_score anxiety freq_study_peers freq_social_peers
	outreg2 using "descriptive", sum(log) replace excel label  

	capture erase "descriptive.txt"
	
	
/////////////////////////////////
** Table 2: Randomization Test
/////////////////////////////////
	
 	use "$data/baseline_groups.dta", clear 
	
 	foreach y in $pt {
		qui bys year strataid: egen M`y'=mean(`y') 
		qui bys year strataid: gen N`y'=_N 
		qui gen LOM`y' = (M`y'*N`y'-`y')/(N`y'-1) 
	}		
 	
 	keep if !mi(compete_edl)
	
	cd "$tab"
 	capture erase "RandomizationTest_PeerMean.xml" 
	capture erase "RandomizationTest_PeerMean.txt" 
	
	foreach y in $pt {
		qui sum PM`y' 
		qui replace PM`y'=(PM`y'-`r(mean)')/`r(sd)'
		qui sum `y'
		qui replace `y'=(`y'-`r(mean)')/`r(sd)'
		qui reg `y' PM`y' LOM`y' i.strataid#i.year, vce(cl groupid)
		outreg2 using "RandomizationTest_PeerMean.xml", excel dec(3) label nocons keep(PM*)
	}		
	capture erase "RandomizationTest_PeerMean.txt" 
		
		
////////////////////////////////////////////////// 	
** Table 3: Peer Personality and Own Personality 
////////////////////////////////////////////////// 

	use "$data/analysis_data", clear
	cd "$tab"

	foreach x in $pt {
	qui reg `x'_edl PM`x' `x' $fe, vce(cl groupid)
		outreg2 using "`x'_edl", excel replace dec(3) label  addsta(Adjusted R-squared, `e(r2_a)') keep(PM`x') nocons sta(coef se pval) bra(pval)  pare(se)
		
	qui reg `x'_edl PM`x' $pt $fe, vce(cl groupid)
		outreg2 using "`x'_edl", excel dec(3) label addsta(Adjusted R-squared, `e(r2_a)') keep(PM`x') nocons sta(coef se pval) bra(pval)  pare(se)

	qui reg `x'_edl PM`x' $pt $hs $ctr,  vce(cl groupid)
		outreg2 using "`x'_edl", excel dec(3) label addsta(Adjusted R-squared, `e(r2_a)') keep(PM`x') nocons sta(coef se pval) bra(pval)  pare(se)

	qui reg `x'_edl PM`x' $pt $hs $ppt $ctr,  vce(cl groupid)
		outreg2 using "`x'_edl", excel dec(3) label addsta(Adjusted R-squared, `e(r2_a)') keep(PM`x' )	nocons 	sta(coef se pval) bra(pval)  pare( se)
		
	qui reg `x'_edl PM`x' $pt $hs $ppt $opt $ctr, vce(cl groupid)
		outreg2 using "`x'_edl", excel dec(3) label addsta(Adjusted R-squared, `e(r2_a)') keep(PM`x')	nocons sta(coef se pval) bra(pval)  pare(se)
	
	capture erase "`x'_edl.txt"
	}	
	

///////////////////////////////////////////////////////////
** Table 4: Peer Personality and Interaction Frequency
///////////////////////////////////////////////////////////
	
	use "$data/analysis_data", clear
	cd "$tab"
	
	egen StudyFreq=std(freq_study_peers)
	egen SocialFreq=std(freq_social_peers)	
	egen interact=rowmean(StudyFreq SocialFreq)
	egen Interaction=std(interact) 

	reghdfe StudyFreq  $ppt $pt $hs $opt, a($ctr) vce(cl groupid) 
 	test $ppt
	outreg2 using Interaction, excel dec(3) keep($ppt) replace addstat(p-val, `r(p)')  nocons	
	
 	reghdfe SocialFreq $ppt $pt $hs $opt, a($ctr) vce(cl groupid)  	
	test $ppt
	outreg2 using Interaction, excel dec(3) keep($ppt)   addstat(p-val, `r(p)')  nocons 

	reghdfe Interaction $ppt $pt $hs $opt, a($ctr ) vce(cl groupid) 
	test $ppt
	outreg2 using Interaction,excel dec(3) keep($ppt) addstat(p-val, `r(p)')  nocons 
	
	erase Interaction.txt 
	
	
//////////////////////////////////////////////////////////// 
** Table 5: Multiple Testing Correction for Main Results
////////////////////////////////////////////////////////////	

	use "$data/analysis_data", clear
	cd "$tab"
	
	* export the main results in a table
	qui reghdfe compete_edl PMcompete compete , a($fe) vce(cl groupid)
	outreg2 using MHT, excel dec(3) replace sta(coef pval) bra(pval) keep(PM*) nocons
	
	foreach x in open cons extr agre neur {
	qui reghdfe `x'_edl PM`x' `x', a($fe) vce(cl groupid)
	outreg2 using MHT, excel dec(3) nocons sta(coef pval) bra(pval) keep(PM*)
	}

	foreach x in compete open cons {
	qui reghdfe exam_attend PM`x' `x', a($fe) vce(cl groupid) 
	outreg2 using MHT, excel dec(3) nocons sta(coef pval) bra(pval) keep(PM*)
	}
	
	egen score_std=std(exam_score)
	foreach x in compete open cons {
	qui reghdfe score_std PM`x' `x', a($fe) vce(cl groupid) 
	outreg2 using MHT, excel dec(3) nocons sta(coef pval) bra(pval) keep(PM*)
	}	
	
 	foreach x in compete open cons {
	qui reghdfe anxiety PM`x' `x', a($fe) vce(cl groupid) 
	outreg2 using MHT, excel dec(3) nocons sta(coef pval) bra(pval) keep(PM*)
	}	
 	capture erase "MHT.txt"
	
	* Romano-Wolf corrected p-values 
	rwolf2 ///
	(reghdfe compete_edl PMcompete compete , a($fe) vce(cl groupid) ) ///
	(reghdfe open_edl 	PMopen open, a($fe) vce(cl groupid) ) ///
	(reghdfe cons_edl PMcons cons , a($fe) vce(cl groupid) ) ///
	(reghdfe extr_edl PMextr extr, a($fe) vce(cl groupid) ) ///
	(reghdfe agre_edl PMagre agre, a($fe) vce(cl groupid) ) ///
	(reghdfe neur_edl PMneur neur, a($fe) vce(cl groupid) ) ///
	(reghdfe exam_attend PMcompete compete, a($fe) vce(cl groupid) ) ///
	(reghdfe exam_attend PMopen open, a($fe) vce(cl groupid) ) ///
	(reghdfe exam_attend PMcons  cons, a($fe) vce(cl groupid) ) ///
	(reghdfe exam_score PMcompete compete, a($fe) vce(cl groupid) ) ///
	(reghdfe exam_score PMopen open, a($fe) vce(cl groupid) ) ///
	(reghdfe exam_score PMcons  cons, a($fe) vce(cl groupid) ) ///
	(reghdfe anxiety PMcompete compete, a($fe) vce(cl groupid) ) ///
	(reghdfe anxiety PMopen open, a($fe) vce(cl groupid) ) ///
	(reghdfe anxiety PMcons  cons, a($fe) vce(cl groupid) ) ///
	, indepvars(PMcompete, PMopen, PMcons, PMextr, PMagre, PMneur, PMcompete, PMopen, PMcons, PMcompete, PMopen, PMcons, PMcompete, PMopen, PMcons) seed(8759) reps(10000) nodots strata(year study_program) cluster(groupid)

	** FDR sharpened q-values 
		* 1. run the following do-file
			//clear
			//do "$path/dofile/3_fdr_sharpened_qvalues"
		* 2. insert the set of p-values 
			// 0.0023
			// 0.0007
			// 0.002
			// 0.014
			// 0.0171
			// 0.0295
			// 0.0525
			// 0.1104
			// 0.3341
			// 0.4893
			// 0.5493
			// 0.7813
			// 0.7899
			// 0.8123
			// 0.8932
 

**=================================================================================**	
**================================ ONLINE APPENDIX ================================**
**=================================================================================**
	
	
//////////////////////////////////////////////////////////////////////////////
** Figure A2: The Distribution of Personality Traits at Baseline and Endline
//////////////////////////////////////////////////////////////////////////////
	
use "$data/baseline_groups", clear 	
keep if !mi(compete_edl)
	foreach x in $pt  {
	twoway (kdensity `x', lc(black) ) (kdensity `x'_edl, lc(gray) lp(shortdash)), ///
	legend(order(1 "Baseline" 2 "Endline") ring(0) pos(11) col(1) size(small) symx(6) nobox region(color(none))) name(`x', replace)  ytitle(Kdensity, size(small)) title("`: var lab `x' '", margin(b=3) size(med)) xtitle(" ") ylab(,angle(0) nogrid labsize(small)) xlab(0(1)10, labsize(small))
		}

	graph combine $pt, col(3) xsize(14) ysize(10) xcomm ycomm 
	graph export "$fig/FigA2_Distribution_Trits.pdf", replace 
		

/////////////////////////////////////////////////////// 
** Figure A3: Histogram of Peer Personality Measures 
/////////////////////////////////////////////////////// 
			
use "$data/baseline_groups", clear
keep if !mi(compete_edl)

 	foreach x in $pt {
		sum PM`x', det 
		local med `r(p50)'
		sum PM`x' if PM`x'>=`med'
		sum PM`x' if PM`x'<`med'
	hist PM`x', name(`x', replace) frac xtitle("Peer `: var lab `x''", margin(t=1) size(med)) ylab(, labsize(small) nogrid) xlab(, labsize(small)) bin(10) lc(white) fc(gray*.5) addplot(scatteri 0 `med' .3 `med', recast(line) lc(black)) legend(off)
	}
 	graph combine $pt, col(3) ycomm xsize(15) ysize(10)
	graph export "$fig/FigA3_Histogram_PeerTraits.pdf", replace  
	
		
//////////////////////////////////////////////
** Figure A4: Convergence of Personality
//////////////////////////////////////////////

use "$data/baseline_groups", clear 	
keep if !mi(compete_edl)	

	foreach x in $pt{
	gen D`x'=`x'-PM`x'
	gen D`x'_edl=`x'_edl-PM`x'  
 	qui reg D`x'_edl $hs $ctr $pt
	predict RD`x'_edl, residuals 
	qui reg D`x' $hs $ctr $pt
	predict RD`x', residuals 
	}
	

 	** Test the overall slope
	foreach x in $pt{
	reghdfe D`x'_edl D`x' $pt $hs , a($ctr) vce(cl groupid)
	local `x'_b = round(_b[D`x'],.01)
 	test _b[D`x']==1
	local `x'_p = round(`r(p)',.001)
 	}	
	local nn 10 
	local x compete 
	binscatter RD`x'_edl RD`x' RD`x', n(`nn') ms(o none) mcolor(black) lcolor(black gray) xline(0, lc(gray*.5) lp(shortdash)) yline(0, lc(gray*.5) lp(shortdash)) xtitle("Baseline Distance (d{sub:0})", size(medsmall)) ytitle("Endline Distance (d{sub:1})", size(medsmall))  title("{bf:`: var lab `x''}", size(med) margin(b=2)) name(`x', replace) ylab(-2(1)2, labsize(small) nogrid) xlab(-2(1)2, labsize(small)) text(-.5 .4  "{it:b} =``x'_b'", place(e) size(small)) text(-.8 .4  "{it:{&beta}} = 1 ({it:p} = ``x'_p')", place(e) size(small)) legend(order(4 "45{sup:o} Line" 3 "Fitted Line") size(medsmall) symxsize(*.75) region(color(none)))
	
	foreach x in cons open agre neur{
	binscatter RD`x'_edl RD`x' RD`x', n(`nn') ms(o none) mcolor(black) lcolor(black gray) xline(0, lc(gray*.5) lp(shortdash)) yline(0, lc(gray*.5) lp(shortdash)) xtitle("Baseline Distance (d{sub:0})", size(medsmall)) ytitle("Endline Distance (d{sub:1})", size(medsmall)) legend(off) title("{bf:`: var lab `x''}", size(med) margin(b=2)) name(`x', replace) ylab(-1(.5)1, labsize(small) nogrid) xlab(-1(.5)1, labsize(small))  text(-.2 .2 "{it:b} =``x'_b'", place(e) size(small)) text(-.35 .2  "{it:{&beta}} = 1 ({it:p} = ``x'_p')", place(e) size(small))
	}	
	
	local x extr 
	binscatter RD`x'_edl RD`x' RD`x', n(`nn') ms(o none) mcolor(black) lcolor(black gray) xline(0, lc(gray*.5) lp(shortdash)) yline(0, lc(gray*.5) lp(shortdash)) xtitle("Baseline Distance (d{sub:0})", size(medsmall)) ytitle("Endline Distance (d{sub:1})", size(medsmall)) legend(off) title("{bf:`: var lab `x''}", size(med) margin(b=2)) name(`x', replace) ylab(-1(.5)1, labsize(small) nogrid) xlab(-1(.5)1, labsize(small)) text(-.3 .2 "{it:b} =``x'_b'", place(e) size(small)) text(-.55 .2  "{it:{&beta}} = 1 ({it:p} = .688)", place(e) size(small)) 
	
	grc1leg2 $pt, col(3) 
	graph display, xsize(16) ysize(12)
	graph export "$fig/FigA4_ConvergeToPeerMean.pdf", replace  


//////////////////////////////////////////////////////////////////////////
** Figure A5: Non-Linear Spillovers?
//////////////////////////////////////////////////////////////////////////
	
use "$data/analysis_data", clear

	local n 10 
 	foreach x in $pt {
	qui reghdfe `x'_edl PM`x' `x', a($ctr) vce(cl groupid)
    local pvalue= round(el(r(table),4,1),.001)
    local b = round(_b[PM`x'],.001) 
	binscatter `x'_edl PM`x', control(`x' `ctr') name(`x', replace) lc(black) mc(black) ytitle("Own `: var lab `x''", margin(r=1) size(med)) xtitle("Peer `: var lab `x''", margin(t=1) size(med)) n(`n')  text(-.25 .9 "{it:b} = `b'`star'" "{it:p}-val = `pvalue'", size(medsmall)) ysca(r(-.3 .2)) xsca(r(-2 2)) ylab(, labsize(small) nogrid) xlab(, labsize(small))
	}
 	graph combine $pt, col(3) ycomm xsize(15) ysize(10)
	graph export "$fig/FigA5_Binscatter_OwnPeerTrait_hor.pdf", replace  
	
	
//////////////////////////////////////////////////////////////////////////
** Figure A6: Heterogeneous Effects: Own Trait Below Versus Above Median		
//////////////////////////////////////////////////////////////////////////

	use "$data/analysis_data", clear
	
	foreach x in $pt  {
	sum `x' , det  
	gen AM`x'=(`x'>`r(p50)')  
	eststo AM`x':	qui reghdfe `x'_edl PM`x' $hs $pt if AM`x', a($ctr) vce(cl groupid)  
	eststo BM`x':	qui reghdfe `x'_edl PM`x' $hs $pt if !AM`x', a($ctr) vce(cl groupid)  
	} 
	
	coefplot AMcompete AMopen AMcons , ciopts(lc(black)) mc(black) mlabc(black) keep($ppt) level(95)   grid(none) xline(0, lc(gray) lw(.2) lp(shortdash))  msize(small) legend(off)   xlab(-.1(.1).2, labs(small) format(%3.2f)) ylab(, labs(med))  mlab mlabpos(12) format(%4.3f) mlabsize(medsmall)  title("Own Trait {bf:Below} Median", margin(b=5) size(medlarge)) name(below, replace) fxsize(90) fysize(100)  
	
	coefplot BMcompete BMopen BMcons, ciopts(lc(black))  mc(black) mlabc(black) keep($ppt) level(95)  grid(none) xline(0, lc(gray) lw(.2) lp(shortdash)) msize(small) legend(off)  yscale(off)  xlab(-.1(.1).2, labs(small) format(%3.2f)) ylab(, labs(medsmall))  mlab mlabpos(12) format(%4.3f) mlabsize(medsmall)  title("Own Trait {bf:Above} Median", margin(b=5) size(medlarge)) name(above, replace) fxsize(55) fysize(100) 
	
	graph combine below above , ycomm 
	graph export "$fig/FigA6_Heterogeneity_Median.pdf", replace  
			

////////////////////////////////////////////////////////////////////////// 
** Figure A7: Correlation Between Personality and Academic Achievement
//////////////////////////////////////////////////////////////////////////	
	
	use "$data/analysis_data", clear
	
	egen hs_grade=rowmean(hs_math_grade hs_language_grade)
	egen Grade=std(grade)
	foreach x in hs_grade grade{
	sum `x'
	replace `x'=(`x'-r(mean))/r(sd)
	}
	
	eststo clear 
	foreach x in $pt { 
	lab var `x' "Baseline `: var lab `x''"
	sum `x'
	replace `x'=(`x'-`r(mean)')/`r(sd)'
	eststo `x': qui reghdfe hs_grade `x' , a(year study_program) vce(r) 
	}
	
	coefplot (compete open cons, ciopts(lc(black)) mc(black) mlabc(black)) ///
	(extr agre neur, m(d) ciopts(lc(black)) mc(black) mlabc(black)) , /// 
	m(o) mlab mlabpos(12) mlabsize(medsmall) mlabgap(1) keep($pt) grid(none) xline(0, lc(gray) lw(.2) lp(shortdash)) msize(medsmall)  xlab(-.2(.1).3, labs(small) format(%3.1f)) ylab(, labs(medsmall))  format(%4.3f) title("Dep Var:" "Std. High School Grade", margin(b=5) size(med)) name(hsch, replace) fysize(100) fxsize(95) legend(off)
		
	foreach x in $pt {
		sum `x'_edl 
		replace `x'_edl =(`x'_edl -`r(mean)')/`r(sd)'
	 	eststo `x':  reghdfe Grade `x'_edl , a(year study_program) vce(r) 
	}		
 		
	coefplot (compete open cons, ciopts(lc(black)) mc(black) mlabc(black)) ///
	(extr agre neur, m(d) ciopts(lc(black)) mc(black) mlabc(black)) , /// 
	m(o) mlab mlabpos(12) mlabsize(medsmall) mlabgap(1) keep(*_edl) legend(off) grid(none) xline(0, lc(gray) lp(shortdash) lw(.2)) msize(medsmall) xlab(-.3(.1).2, labs(small) format(%3.1f)) ylab(, labs(medsmall)) format(%4.3f)  name(econ, replace) title("Dep Var:" "Std. Economics Grade", margin(b=5) size(med)) ysca(off) fysize(100) fxsize(50)
	
	graph combine hsch econ, ycomm  
	graph export "$fig/FigA7_Personality_AcademicAchievement.pdf", replace  
	
	
//////////////////////////////////////////////////////////////////////////			
** Figure A8: Robustness to Dropping Extreme Values of Personality			
//////////////////////////////////////////////////////////////////////////	

	use "$data/analysis_data", clear

	foreach x in compete { 
	lab var PM`x' "Impact of Peer `:var lab `x''"
	eststo O`x': reghdfe `x'_edl PM`x' `x',  a($fe) vce(cl groupid)
	eststo D`x': reghdfe `x'_edl PM`x' `x' if `x'>1 & `x'<10,  a($fe) vce(cl groupid)
	eststo E`x': reghdfe `x'_edl PM`x' `x' if `x'>1.4 & `x'<9.6,  a($fe) vce(cl groupid)
	eststo F`x': reghdfe `x'_edl PM`x' `x' if `x'>1.7 & `x'<9.3,  a($fe) vce(cl groupid)
	}		 
	foreach x in open cons extr agre neur  { 
	lab var PM`x' "Impact of Peer `:var lab `x''"
	eststo O`x': reghdfe `x'_edl PM`x' `x',  a($fe) vce(cl groupid)
	eststo D`x': reghdfe `x'_edl PM`x' `x' if `x'>1 & `x'<7,  a($fe) vce(cl groupid)
	eststo E`x': reghdfe `x'_edl PM`x' `x' if `x'>1.4 & `x'<6.6,  a($fe) vce(cl groupid)
	eststo F`x': reghdfe `x'_edl PM`x' `x' if `x'>1.7 & `x'<6.3,  a($fe) vce(cl groupid)
	}

	coefplot (Ocompete O*, m(o) mc(black) ) (Dcompete D*, m(d) mc(black)) (Ecompete E*, m(s) mc(black)), ciopts(lc(black)) keep($ppt) level(95) grid(none) xline(0, lc(gray) lw(.2) lp(shortdash)) msize(small) legend(order(2 "T{sub:0} {&isin} [min, max]" 4 "T{sub:0} {&isin} (min, max)" 6 "T{sub:0} {&isin} (min+1/3, max-1/3)" 1 "95% CI") size(small) col(2))  xlab(-.1(.05).15, labs(vsmall) format(%3.2f)) ylab(, labs(small))  
 	graph export "$fig/FigA8_DropOutliers.pdf", replace  
			
	
//////////////////////////////////////////////////////////////////////	
** Figure A9: Main Results in Initial Sample and Replication Sample
//////////////////////////////////////////////////////////////////////	

	use "$data/analysis_data", clear
	foreach x in $pt  {
	gen `x'_pm=PM`x'
	}
	foreach x in $pt  {
	local xlab "`: var lab `x' '"
	lab var  PM`x' "Impact of Peer `xlab' on Own `xlab'"
	eststo A`x':	 reghdfe `x'_edl PM`x' `x' ,  a($fe) vce(cl groupid)
	eststo F`x':	 reghdfe `x'_edl PM`x' `x' if year<2021,  a($fe) vce(cl groupid)
	eststo P`x':	 reghdfe `x'_edl PM`x' `x' if year>2020, a($fe) vce(cl groupid)
	}
	
	coefplot  (F*,  m(t) mc(black)) (P*,m(d) mc(black)) (A*,  m(o) mc(black)) , ciopts(recast(rcap .) lc(black black) lw(.2 .2)) keep($ppt) level(90 95) grid(none) xline(0, lc(gray) lp(shortdash) lw(.2)) msize(small) legend( span order(3 "Initial sample (2018-2020)" 6  "Replication sample (2021-2023)" 9 "Pooled sample (2018-2023)" 1 "90% CI"  2 "95% CI") symx(*.5) size(small) col(3) region(col(none)) ) xlab(-.1(.05).15, labs(vsmall) format(%3.2f) notick) ylab(, labs(small) notick) graphregion(margin(zero))  
	graph display, xsize(15) ysize(11)
	graph export "$fig/FigA9_Replication.pdf", replace 

		
//////////////////////////////////////////////////////////////////////////		
** Figure A10: Course Grade for Students With or Without Study Groups	
//////////////////////////////////////////////////////////////////////////

	use  "$data/baseline_all", clear  
 	egen grade_std=std(grade_final)
	
 	cibar grade_std, over(signup) barc(black black*.8)  graphopt(ytitle("Std. Course Grade") xlab(1 "Without a group" 2 "With a group") ylab(, nogrid) legend(off) yline(0, lc(gray) ) name(raw, replace) title("(A)", size(med))) baropt(barw(.75))
	
	qui reg grade_std i.year i.study_program 
	predict rg1, residuals 
	
	ttest rg1 , by(signup) 
	cibar rg1, over(signup) barc(black black*.8) graphopt(ytitle("Std. Course Grade (Residualized)") xlab(1 "Without a group" 2 "With a group") ylab(, nogrid) legend(off) yline(0, lc(gray)) name(rg1, replace) title("(B)", size(med))) baropt(barw(.75))
		
	qui reg grade_std i.year i.study_program female $hs $pt repeater i.major_category i.age 
	predict rg2, residuals 
	ttest rg2 , by(signup) 
	
	cibar rg2, over(signup) barc(black black*.8) graphopt(ytitle("Std. Course Grade (Residualized)") xlab(1 "Without a group" 2 "With a group") ylab(, nogrid) legend(off) yline(0, lc(gray)) name(rg2, replace) title("(C)", size(med))) baropt(barw(.75))
	
	graph combine raw rg1 rg2, col(3) ycomm
	graph display, xsize(16) ysize(10)
	graph export "$fig/FigA10_Having_a_group.pdf", replace 		
			

//////////////////////////////////////////////////////////////////////////
** Table A1: Baseline Characteristics by Group Registration
//////////////////////////////////////////////////////////////////////////
	
	use "$data/baseline_all", clear 
	cd "$tab"
	drop if mi(compete) 
  	
	preserve 
	keep if !signup
	keep $pt hs_math_grade hs_language_grade female 
	outreg2 using "NotRegistered", sum(log) replace excel label sortvar($pt hs_math_grade hs_language_grade female )
	restore 
	
	preserve 
	keep if signup
	keep $pt hs_math_grade hs_language_grade female 
	outreg2 using "Registered", sum(log) replace excel label  sortvar($pt hs_math_grade hs_language_grade female )
	restore 
	
	local varl $pt hs_math_grade hs_language_grade female 
	foreach v in `varl' { 
		qui ttest `v', by(signup)
		dis %4.3f `r(p)'
	}
 
	erase "Registered.txt" 
	erase "NotRegistered.txt"
		
		
//////////////////////////////////////////////////////////////////////////
** Table A2: Test for Balancing and Selective Attrition 
//////////////////////////////////////////////////////////////////////////
	
	* Panel A: 
	use "$data/baseline_groups.dta", clear 
	cd "$tab"
	
 	capture erase "RandomizationTest_PeerMean_Full.xml" 
	capture erase "RandomizationTest_PeerMean_Full.txt" 
	
	foreach y in $pt {
		qui bys year strataid: egen M`y'=mean(`y') 
		qui bys year strataid: gen N`y'=_N 
		qui gen LOM`y' = (M`y'*N`y'-`y')/(N`y'-1) 
		qui sum PM`y' 
		qui replace PM`y'=(PM`y'-`r(mean)')/`r(sd)'
		qui sum `y'
		qui replace `y'=(`y'-`r(mean)')/`r(sd)'
		qui reghdfe `y' PM`y' LOM`y', a(strataid#year) vce(cl groupid)
		outreg2 using "RandomizationTest_PeerMean_Full.xml", excel dec(3) label nocons keep(PM*)
	}		
	capture erase "RandomizationTest_PeerMean_Full.txt" 
		
	* Panel B: 
 	capture erase "EndlineSurvey_PeerTrait.xml" 
	capture erase "EndlineSurvey_PeerTrait.txt" 
	gen EndSurvey=(!mi(compete_edl))
	
	foreach y in $pt {
	qui sum PM`y'
	qui replace PM`y'=(PM`y'-r(mean))/r(sd)
	
	qui reg  EndSurvey PM`y' i.strataid#i.year, vce(cl groupid)
	outreg2 using "EndlineSurvey_PeerTrait", excel dec(3) label nocons keep(PM*)
	}		
	
	capture erase "EndlineSurvey_PeerTrait.txt" 
		

/////////////////////////////////////////////////////////////// 	
** Table A3: Pairwise Correlations Between Personality Traits	 
/////////////////////////////////////////////////////////////// 	
	
	use "$data/baseline_groups", clear 
	pwcorr $pt , sig 	
	
		
/////////////////////////////////////////////////////////////// 	
** Table A4: Cronbach's Alpha Coefficient of Reliability
/////////////////////////////////////////////////////////////// 	

	use "$data/personality_raw", clear 	
	
	* at baseline and endline
	alpha cons_thorough cons_lazy cons_efficient  
	alpha cons_thorough_edl cons_lazy_edl cons_efficient_edl  
 
	alpha open_original open_artistic open_imagine  
	alpha open_original_edl open_artistic_edl open_imagine_edl 
	
	alpha agre_rude agre_forgive agre_kind  
	alpha agre_rude_edl agre_forgive_edl agre_kind_edl 
	
	alpha extr_talkative extr_outgoing extr_reserved 
	alpha extr_talkative_edl extr_outgoing_edl extr_reserved_edl 
	
	alpha neur_worry neur_nervous neur_relaxed 
	alpha neur_worry_edl neur_nervous_edl neur_relaxed_edl 
	
	* at follow-up
 	clear 
	append using "$data/followup_2021" "$data/followup_2022" 
		
	alpha cons_thorough cons_persevere cons_lazy cons_efficient cons_organized cons_plan    
	alpha open_inventive open_original open_curious open_artistic open_ideas open_imagine  
 	alpha cons_thorough cons_lazy cons_efficient 
	alpha open_original open_artistic open_imagine  
	alpha agre_rude agre_forgive agre_kind  
	alpha neur_worry neur_nervous neur_relaxed  
	alpha extr_talkative extr_outgoing extr_reserved  


/////////////////////////////////////////////////////////////////////////////
** Table A5: Stability of Personality – Within-Trait Correlation Across Time
/////////////////////////////////////////////////////////////////////////////
	
	use "$data/analysis_data", clear 	
	foreach x in $pt {
	pwcorr `x' `x'_edl, sig 
	}
	
 	foreach x in $pt {
	pwcorr `x' `x'_fl21, sig 
	}
	
	foreach x in $pt {
	pwcorr `x' `x'_fl22, sig 
	}	

				
//////////////////////////////////////////////////////////////////////////	
** Table A6: Main Results Excluding the COVID-19 Cohort (2020/21)	
//////////////////////////////////////////////////////////////////////////

	use "$data/analysis_data", clear
	cd "$tab"
	drop if year==2020 

	foreach x in $pt {
	qui reg `x'_edl PM`x' `x' $fe, vce(cl groupid)
		outreg2 using "`x'_edl_NoCovid", excel replace dec(3) label keep(PM`x') nocons sta(coef se pval) bra(pval)  pare(se)
		
	qui reg `x'_edl PM`x' $pt $fe, vce(cl groupid)
		outreg2 using "`x'_edl_NoCovid", excel dec(3) label keep(PM`x') nocons sta(coef se pval) bra(pval)  pare(se)

	qui reg `x'_edl PM`x' $pt $hs $ctr,  vce(cl groupid)
		outreg2 using "`x'_edl_NoCovid", excel dec(3) label keep(PM`x') nocons sta(coef se pval) bra(pval)  pare(se)

	qui reg `x'_edl PM`x' $pt $hs $ppt $ctr,  vce(cl groupid)
		outreg2 using "`x'_edl_NoCovid", excel dec(3) label keep(PM`x' )	nocons 	sta(coef se pval) bra(pval)  pare( se)
		
	qui reg `x'_edl PM`x' $pt $hs $ppt $opt $ctr, vce(cl groupid)
		outreg2 using "`x'_edl_NoCovid", excel dec(3) label keep(PM`x')	nocons sta(coef se pval) bra(pval)  pare(se)
	
	capture erase "`x'_edl_NoCovid.txt"
	}		
	
	
/////////////////////////////////////////// 
** Table A7: Leuven Test of Randomization
///////////////////////////////////////////

	use "$data/baseline_groups.dta", clear 
		
	foreach x in $pt female hs_math_grade hs_language_grade hs_hours {
	qui reg `x' i.strataid#i.year
	qui predict R`x' if e(sample), residuals 
	
	qui reg R`x' i.groupid 
 	qui testparm i.groupid 
 	dis `r(F)' 
	}
	
	foreach x in $pt female hs_math_grade hs_language_grade hs_hours {
	qui reg R`x' i.groupid 
	qui testparm i.groupid 
 	dis  `r(p)' 
	}	
			
	
///////////////////////////////////////////////////////////////////////////////////////////	
** Table A8: Balance Test: Joint Effects of Peer Personality on Own Personality at Baseline	
///////////////////////////////////////////////////////////////////////////////////////////	

	use "$data/baseline_groups.dta", clear 
	cd "$tab"
	
	foreach y in $pt {
		qui bys year strataid: egen M`y'=mean(`y') 
		qui bys year strataid: gen N`y'=_N 
		qui gen LOM`y' = (M`y'*N`y'-`y')/(N`y'-1) 
 	}		
	
	keep if !mi(compete_edl)
 	capture erase "BalanceTest_Joint.xml" 
	capture erase "BalanceTest_Joint.txt" 
	
	foreach y in $pt {
		qui sum PM`y' 
		qui replace PM`y'=(PM`y'-`r(mean)')/`r(sd)'
		qui sum `y'
		qui replace `y'=(`y'-`r(mean)')/`r(sd)'
		qui reg `y' $ppt LOM`y' i.strataid#i.year, vce(cl groupid)
		test $ppt 
		local fsta: display %4.3f `r(F)'
		local pval: display %4.3f `r(p)' 
		outreg2 using "BalanceTest_Joint.xml", excel dec(3) label nocons keep(PM*) addsta("F-statistic", `fsta', "p-value", `pval')
	}		
	
		
//////////////////////////////////////////////////////////////////////////
** Table A9: Full Spillover Matrix with Multiple Testing Correction
//////////////////////////////////////////////////////////////////////////
	
	use "$data/analysis_data", clear
	cd "$tab"
	
	capture erase "Fullimpact.xml"
	capture erase "Fullimpact.txt"		
	foreach x in $pt {
	qui reg `x'_edl $ppt $pt $hs $opt $ctr, vce(cl groupid) 
 	outreg2 using "Fullimpact", excel dec(3) label keep($ppt) nocons  stats(coef se pval)  bracket(pval) paren(se)  
	test $ppt
	}	
	capture erase "Fullimpact.txt"	
	
	** MHT 
	local ctr $pt $hs $ppt $opt, a($ctr) 
	rwolf2 ///
	(reghdfe compete_edl PMcompete `ctr' vce(cl groupid) ) ///
	(reghdfe open_edl PMopen `ctr' vce(cl groupid) ) ///
	(reghdfe cons_edl PMcons  `ctr' vce(cl groupid) ) ///
	(reghdfe extr_edl PMextr `ctr' vce(cl groupid) ) ///
	(reghdfe agre_edl PMagre `ctr' vce(cl groupid) ) ///
	(reghdfe neur_edl PMneur `ctr' vce(cl groupid) ) ///
	, indepvars($ppt, $ppt, $ppt, $ppt, $ppt, $ppt) seed(8759) reps(10000) nodots cluster(groupid) strata(year study_program)
	

//////////////////////////////////////////////////////////////////////////	
** Table A10: Impact of Peer Personality on Follow-Up Survey Response	
//////////////////////////////////////////////////////////////////////////	
	
	use "$data/follow_up_response", clear 
 	cd "$tab"
	
	capture erase FLresponse.xml 
	capture erase FLresponse.txt 
	foreach x in $pt {
	egen PM`x'_std=std(PM`x')
	lab var PM`x'_std "Peer `:var lab `x''"
	qui reghdfe followup_response PM`x'_std , a(i.strata#i.year flyr) vce(cl groupid) 
	outreg2 using FLresponse, excel dec(3) nocons nonotes keep(PM*) label 
	}
	capture erase FLresponse.txt 
				
		
	
///////////////////////////////////////////////////////////////////////////////	
** Figure D1: Personality Traits of Our Student Sample versus SOEP Respondents
///////////////////////////////////////////////////////////////////////////////	
	
	* Notes: original SOEP data can be acquired from SOEP 
	
	* plot summary statistics visually 
 	use "$data/big_five_summary_statistics", clear 
	gen ub = mean + 1.96*sd  
	gen lb = mean - 1.96*sd 
	egen setting=group(sample)
	replace sett=5-setting
	
	foreach x in Openness Conscientiousness	Extraversion	Agreeableness	Neuroticism	{
	twoway (rcap ub lb setting if trait=="`x'" & setting==1, lc(black) lp(dash))  ///
	(rcap ub lb setting if trait=="`x'" & setting==2, lc(black) lp(dash))  ///
	(rcap ub lb setting if trait=="`x'" & setting==3, lc(black) lp(dash))  ///
	(rcap ub lb setting if trait=="`x'" & setting==4, lc(black) )  ///
	(scatter mean setting if trait=="`x'"& setting==1, color(black)) ///
	(scatter mean setting if trait=="`x'"& setting==2, color(black)) ///
	(scatter mean setting if trait=="`x'"& setting==3, color(black)) ///
	(scatter mean setting if trait=="`x'"& setting==4, color(black)) ///
	, title("{bf:`x'}", size(medlarge) margin(b=2)) ylab(1(1)7, labsize(medsmall)) xlab(1 "SOEP: All" 2 "SOEP: 18-22" 3 "Students:  All" 4 "Students: Analysis", angle(20) labsize(medsmall)) xsca(r(.5 4.5)) legend(off) xtitle(" ") name(`x', replace) graphregion(margin(l=2 r=2 u=0 b=0)) ytitle("")
	}

	graph combine Openness Conscientiousness Extraversion Agreeableness Neuroticism, col(3) ysize(10) xsize(14) graphregion(margin(zero))   
	graph export "$fig/FigD1_BigFiveComparison.pdf", replace 
	
	






